library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.1 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 3.6.2
## here() starts at /Users/ryanharrington/Dropbox/00 - PCS Course/Example/covid_exploration
source(here::here("functions.R"))
theme_set(theme_minimal())
Read in Data
Read in data
# Link for confirmed cases
confirmed <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
# Link for confirmed deaths
deaths <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
confirmed_df <-
read_csv(confirmed) %>%
mutate(Type = "Confirmed")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## iso2 = col_character(),
## iso3 = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Combined_Key = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
deaths_df <-
read_csv(deaths) %>%
mutate(Type = "Deaths")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## iso2 = col_character(),
## iso3 = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Combined_Key = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Create a population dataframe
population_df <-
deaths_df %>%
select(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key, Population)
Add population to confirmed
confirmed_df <-
confirmed_df %>%
left_join(population_df,
by = c("UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Province_State", "Country_Region", "Lat", "Long_", "Combined_Key")
) %>%
select(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key, Population, everything())
Union confirmed cases and deaths together
covid <-
union(confirmed_df, deaths_df) %>%
select(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key, Type, Population, everything())
Check County Data
New Castle
covid %>%
filter(Admin2 == "New Castle")
Kent
covid %>%
filter(Admin2 == "Kent",
Province_State == "Delaware")
Sussex
covid %>%
filter(Admin2 == "Sussex",
Province_State == "Delaware")
Delaware
covid %>%
filter(Province_State == "Delaware")
Toy functions
Tell me today’s date
With explicit return
tell_me_todays_date <- function() {
# Body of the function
todays_date <- format(Sys.Date(), format = "%B %d, %Y")
sentence <- paste0("Hello! Today's date is ", todays_date, "!")
return(sentence)
}
With implicit return
tell_me_todays_date <- function() {
# Body of the function
todays_date <- format(Sys.Date(), format = "%B %d, %Y")
paste0("Hello! Today's date is ", todays_date, "!")
}
tell_me_todays_date()
## [1] "Hello! Today's date is November 08, 2021!"
tell_me_todays_date()
## [1] "Hello! Today's date is November 08, 2021!"
Distance from the mean
set.seed(1231)
nums <- sample(1:100, 25, replace = T)
nums
## [1] 74 47 12 60 36 53 60 40 62 46 61 10 26 41 26 92 83 19 12
## [20] 84 38 11 74 43 100
nums - mean(nums, na.rm = T)
## [1] 25.6 -1.4 -36.4 11.6 -12.4 4.6 11.6 -8.4 13.6 -2.4 12.6 -38.4
## [13] -22.4 -7.4 -22.4 43.6 34.6 -29.4 -36.4 35.6 -10.4 -37.4 25.6 -5.4
## [25] 51.6
calc_mean_dist <- function(values) {
dist <- values - mean(values, na.rm = T)
return(dist)
}
calc_mean_dist(values = nums)
## [1] 25.6 -1.4 -36.4 11.6 -12.4 4.6 11.6 -8.4 13.6 -2.4 12.6 -38.4
## [13] -22.4 -7.4 -22.4 43.6 34.6 -29.4 -36.4 35.6 -10.4 -37.4 25.6 -5.4
## [25] 51.6
calc_mean_dist(1:10)
## [1] -4.5 -3.5 -2.5 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5
calc_mean_dist(seq(from = 1, to = 173, by = 5))
## [1] -85 -80 -75 -70 -65 -60 -55 -50 -45 -40 -35 -30 -25 -20 -15 -10 -5 0 5
## [20] 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85
Function for filtering counties
get_county(covid, "New Castle")
get_county(covid, "Kent")
get_county(covid, "Sussex")
get_county(covid, "All counties")
get_county(covid, "All counties", "Maryland")
Continue with Data Cleaning
covid %>%
get_county("New Castle") %>%
tidy_covid()
covid %>%
get_county("Kent", "Maryland") %>%
tidy_covid()
covid %>%
get_county("Monmouth", "New Jersey") %>%
tidy_covid()
covid %>%
get_county("All counties") %>%
tidy_covid()
Adjusting from cumulative to daily values
covid %>%
get_county("All counties") %>%
tidy_covid()
Graphing our data
covid %>%
get_county("All counties") %>%
tidy_covid() %>%
ggplot(aes(x = Date,
y = Confirmed,
color = Admin2,
group = Admin2)) +
geom_line() +
scale_y_continuous(labels = scales::comma) +
theme(legend.position = "top",
axis.title = element_text(face = "bold"),
axis.text = element_text(face = "italic")) +
labs(color = "")

covid %>%
get_county("All counties") %>%
tidy_covid() %>%
graph_covid_over_time(Confirmed_DoD)
